#########################################################
# Diff-in-Diff and Panel Analysis of NVA Service Effects                      #
#########################################################
### Libraries
library(ggplot2)
library(foreign)
library(data.table)
library(stargazer)
library(lfe)
library(xtable)
library(tidyr)
library(dplyr)
library(broom)
library(readxl)


#####################################################################
#####################################################################
## 1. Load Data
#####################################################################
#####################################################################

# Diff-in-Diff Data
load("kaderdaten_panel_full.rda")
data <- master_pkz_year_panel
rm(master_pkz_year_panel)


# create female variable
data$female <- ifelse(data$gender_recode=="female",1,0)
# create male variable
data$male <- ifelse(data$gender_recode=="female",0,1)

# create conscription timing variable
data$post1962 <- ifelse(data$year>1961,1,0)

# create time trends variable
setDT(data)
data[, time_trend := year - min(year, na.rm = TRUE)]
data$time_trend2 <- data$time_trend^2

data$age_cohort_factor <- factor(data$age_1962)
data$year_factor <- factor(data$year)

# generate some more comparison vars
data$economic_elite <- ifelse(data$social_history_recode=="economic_elite",1,0)
data$intelligence <- ifelse(data$social_history_recode=="intelligence",1,0)
data$worker_farmer <- ifelse(data$social_history_recode=="worker_farmer",1,0)

data$no_relatives_abroad <- ifelse(data$relatives_abroad_recode=="no_relatives_abroad",1,0)
data$no_returnee <- ifelse(data$returnee_from_brd_recode=="no_returnee",1,0)
data$father_kpd_sed <- ifelse(data$father_party_before1945_recode=="KPD"|data$father_party_before1945_recode=="SED",1,0)
data$mother_kpd_sed <- ifelse(data$mother_party_before1945_recode=="KPD"|data$mother_party_before1945_recode=="SED",1,0)



## RDD data
# additional background vars
data$relative_abroad_dum <- ifelse(data$relatives_abroad_recode!="no_relatives_abroad",1,0)
data$returnee_from_brd_dum <- ifelse(data$returnee_from_brd_recode!="no_returnee",1,0)
data$mother_party_before1945_dum <- ifelse(data$mother_party_before1945_recode!="no_party",1,0)
data$father_party_before1945_dum <- ifelse(data$father_party_before1945_recode!="no_party",1,0)

###### Create data sets for analyses
data_rd_25_26 <- data[data$age_1962>-1 & data$age_1962<51 & data$age_1962!=25 &data$age_1962!=26,]




#####################################################################
#####################################################################
## Appendix Figure A.2
#####################################################################
#####################################################################


nva_recruitment_wenzke <- readxl::read_excel("./data/Conscription in 1962.xlsx")

nva_recruitment_wenzke <- nva_recruitment_wenzke %>% 
  gather(key = type, value, -Cohort) %>% 
  mutate(type = forcats::as_factor(type))


reg_exam_wenzke <- ggplot(nva_recruitment_wenzke, aes(x = Cohort, y = value, group = type, 
                                                      fill = type)) +
  geom_bar(stat = "identity", 
           position = position_dodge(), 
           color = "black", width = 0.7, size = 0.3) +
  scale_fill_brewer("", palette = "Greys", direction = 1) +
  theme_bw() + 
  labs(x = "", y = "") + 
  scale_x_continuous(breaks = 1931:1945) +
  theme(panel.grid = element_blank(), 
        legend.position = "bottom")

ggsave(reg_exam_wenzke, filename = "./figures/registration_examination_1962.pdf", 
       width = 10, height = 5, scale = 0.8)


#####################################################################
#####################################################################
## Appendix Figure C.2
#####################################################################
#####################################################################

# based on 25 vs. 26 
# subset data to men
data_diff <- data[data$female==0,]
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>24,]
data_diff <- data_diff[data_diff$age_1962<27,]

# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)
data_diff$Cohort <- as.factor(ifelse(data_diff$cohort_binary==1,"Treated","Control"))
data_diff$Cohort <- factor(data_diff$Cohort,levels(data_diff$Cohort)[c(2,1)])  

p11 <- ggplot(data_diff,aes(x=age,y=salheiser_positions_correct,group=Cohort))+
  geom_smooth(aes(linetype=Cohort, 
                  fill = Cohort) ,
              size=0.5,color="black")+
  theme_bw()+
  theme(panel.grid =element_blank(), 
        legend.position = "bottom") +
  scale_fill_manual(values = c("#252525", "#bdbdbd")) +
  # theme(axis.text=element_text(size=15),axis.title=element_text(size=15))+
  xlab("Age")+ylab("Rank")+
  xlim(c(17,30))+
  geom_vline(xintercept=25)+
  annotate("text", x = 26, y = 1.6, label = "Start of NVA Service",size=3)
#annotate("rect", xmin = 18, xmax = 25, ymin = -Inf, ymax = Inf, fill = "grey", alpha = .25, color = NA)+
#annotate("text", x = 26.5, y = 0.7, label = "Start of NVA Service",size=3)
ggsave(p11, filename = "age_rank_plot_new_narrow.pdf")

p12 <- ggplot(data_diff,aes(x=age,y=nc_dummy,group=Cohort))+
  geom_smooth(aes(linetype=Cohort, 
                  fill = Cohort) ,
              size=0.5,color="black")+
  theme_bw()+
  theme(panel.grid =element_blank(), 
        legend.position = "bottom") +
  scale_fill_manual(values = c("#252525", "#bdbdbd")) +
  # theme(axis.text=element_text(size=15),axis.title=element_text(size=15))+
  xlab("Age")+ylab("Nomenclature")+
  xlim(c(17,30))+
  geom_vline(xintercept=25)+
  annotate("text", x = 26, y = 0.4, label = "Start of NVA Service",size=3)
#annotate("rect", xmin = 18, xmax = 25, ymin = -Inf, ymax = Inf, fill = "grey", alpha = .25, color = NA)+
#annotate("text", x = 26.5, y = 0.15, label = "Start of NVA Service",size=3)
ggsave(p12, filename = "age_nc_plot_new_narrow.pdf")



###############################################
###############################################
###  Table C.1
###############################################
###############################################

#### Table 18-25 vs. 26-33
# subset data to men
data_diff <- data[data$female==0,]
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>17,]
data_diff <- data_diff[data_diff$age_1962<34,]

# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

# pick everyone at age=18
#data_diff <- data_diff[data_diff$age==18,]

# generate some more comparison vars
data_diff$economic_elite <- ifelse(data_diff$social_history_recode=="economic_elite",1,0)
data_diff$intelligence <- ifelse(data_diff$social_history_recode=="intelligence",1,0)
data_diff$worker_farmer <- ifelse(data_diff$social_history_recode=="worker_farmer",1,0)

data_diff$no_relatives_abroad <- ifelse(data_diff$relatives_abroad_recode=="no_relatives_abroad",1,0)
data_diff$no_returnee <- ifelse(data_diff$returnee_from_brd_recode=="no_returnee",1,0)
data_diff$father_kpd_sed <- ifelse(data_diff$father_party_before1945_recode=="KPD"|data_diff$father_party_before1945_recode=="SED",1,0)
data_diff$mother_kpd_sed <- ifelse(data_diff$mother_party_before1945_recode=="KPD"|data_diff$mother_party_before1945_recode=="SED",1,0)



data_diff_balance <- data_diff[,c("cohort_binary","party_sed","fdj_yes","org_count","number_of_vol_functions","education_rank","number_of_languages","no_relatives_abroad","no_returnee","father_kpd_sed","mother_kpd_sed","worker_farmer","intelligence","economic_elite")]
data_diff_balance <- na.omit(data_diff_balance)

balance_table <- balanceTable(data_diff_balance,treatment="cohort_binary")

varnames <- c("SED","FDJ","Organization Membership Count","Voluntary Functions","Education Rank","Number of Languages","No Relatives Abroad","No Returnee","Father KPD/SED","Mother KPD/SED","Worker/Farmer","Intelligentsia","Economic Elite")

balance_table <- data.frame(varnames,balance_table)

colnames(balance_table) <- c("Variable","Treated Mean","Control Mean","Std Difference","T-test p-value","Fisher/Wilcox p-value")

print(xtable(balance_table,caption=c("Differences in Means"),label=c("tab:diff:balance"),type = "latex",digits=c(0,0,3,3,3,3,3)), file = "~/Dropbox/DDR/Tables/NVA_Tables/diff_balance_c1.tex",include.rownames=FALSE)





###############################################
###############################################
###  Table C.2
###############################################
###############################################


#### Table 23-25 vs. 26--28
# subset data to men
data_diff <- data[data$female==0,]
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>22,]
data_diff <- data_diff[data_diff$age_1962<29,]

# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

# pick everyone at age=18
#data_diff <- data_diff[data_diff$age==18,]

# generate some more comparison vars
data_diff$economic_elite <- ifelse(data_diff$social_history_recode=="economic_elite",1,0)
data_diff$intelligence <- ifelse(data_diff$social_history_recode=="intelligence",1,0)
data_diff$worker_farmer <- ifelse(data_diff$social_history_recode=="worker_farmer",1,0)

data_diff$no_relatives_abroad <- ifelse(data_diff$relatives_abroad_recode=="no_relatives_abroad",1,0)
data_diff$no_returnee <- ifelse(data_diff$returnee_from_brd_recode=="no_returnee",1,0)
data_diff$father_kpd_sed <- ifelse(data_diff$father_party_before1945_recode=="KPD"|data_diff$father_party_before1945_recode=="SED",1,0)
data_diff$mother_kpd_sed <- ifelse(data_diff$mother_party_before1945_recode=="KPD"|data_diff$mother_party_before1945_recode=="SED",1,0)



data_diff_balance <- data_diff[,c("cohort_binary","party_sed","fdj_yes","org_count","number_of_vol_functions","education_rank","number_of_languages","no_relatives_abroad","no_returnee","father_kpd_sed","mother_kpd_sed","worker_farmer","intelligence","economic_elite")]
data_diff_balance <- na.omit(data_diff_balance)

balance_table <- balanceTable(data_diff_balance,treatment="cohort_binary")

varnames <- c("SED","FDJ","Organization Membership Count","Voluntary Functions","Education Rank","Number of Languages","No Relatives Abroad","No Returnee","Father KPD/SED","Mother KPD/SED","Worker/Farmer","Intelligentsia","Economic Elite")

balance_table <- data.frame(varnames,balance_table)

colnames(balance_table) <- c("Variable","Treated Mean","Control Mean","Std Difference","T-test p-value","Fisher/Wilcox p-value")

print(xtable(balance_table,caption=c("Differences in Means"),label=c("tab:diff:balance"),type = "latex",digits=c(0,0,3,3,3,3,3)), file = "~/Dropbox/DDR/Tables/NVA_Tables/diff_balance_c2.tex",include.rownames=FALSE)







#####################################################################
#####################################################################
## Appendix Table D.3
#####################################################################
#####################################################################

data$sector_factor <- factor(data$sector_cf)
data$year_factor <- factor(data$year)

# diff-in-diff, only adjacent male cohorts, with individual level FE
fmla1 <- as.formula(paste("salheiser_positions_correct~cohort_binary + post1962 + cohort_binary*post1962| year+sector_factor:time_trend+pkz|0|age_1962"))
fmla2 <- as.formula(paste("nc_dummy~cohort_binary + post1962 + cohort_binary*post1962| year+sector_factor:time_trend+pkz|0|age_1962"))

#####################################
# male 18-25 vs. male 26--33
#####################################
# subset data to men
data_diff <- data[data$female==0,]
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>17,]
data_diff <- data_diff[data_diff$age_1962<34,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

m1 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m2 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se1 <- m1$cse
se2 <- m2$cse

#####################################
# male <26 vs. male older than 25
#####################################
# subset data to men
data_diff <- data[data$female==0,]

# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

m3 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m4 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se3 <- m3$cse
se4 <- m4$cse

#####################################
# male 18--25 vs. female 18--25
#####################################
data_diff <- data
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>17,]
data_diff <- data_diff[data_diff$age_1962<26,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$female==0,1,0)

m5 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m6 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se5 <- m5$cse
se6 <- m6$cse


#####################################
# male 23-25 vs. male 26-28
#####################################
# subset data to men
data_diff <- data[data$female==0,]
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>22,]
data_diff <- data_diff[data_diff$age_1962<29,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

m7 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m8 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])
# Get standard errors
se7 <- m7$cse
se8 <- m8$cse


#####################################
# Make Table
#####################################
# Table in .tex:
stargazer(m1,m2,m3,m4,m5,m6,m7,m8,type="latex",notes = "Standard errors are clustered at the age-cohort level.",
          style="qje", 
          title            = "Analysis of System Engagement",
          dep.var.caption  = "",
          dep.var.labels.include = FALSE,
          column.labels   = c("Rank","Nomenclature","Rank","Nomenclature","Rank","Nomenclature","Rank","Nomenclature"),
          se=list(se1,se2,se3,se4,se5,se6,se7,se8),
          add.lines = list(c("Treated","M 18-25","M 18-25","M <26","M <26","M 18-25","M 18-25","M 23-25","M 23-25"),
                           c("Control","M 26-33","M 26-33","M >25","M >25","F 18-25","F 18-25","M 26-28","M 26-28"),
                           c("Year FE","Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes"),
                           c("Sector FE","Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes"),
                           c("Sector Time Trends","Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes"),
                           c("Individual-Level FE","Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes")),
          digits=2,label="tab:diff:d3",keep=c(3),
          out="diff_in_diff_time_table_d3.tex"
)

models <- bind_rows(broom::tidy(m1) %>% mutate(model_name = "m1"),
                    
                    broom::tidy(m2) %>% mutate(model_name = "m2"),
                    
                    broom::tidy(m3) %>% mutate(model_name = "m3"),
                    
                    broom::tidy(m4) %>% mutate(model_name = "m4"),
                    
                    broom::tidy(m5) %>% mutate(model_name = "m5"),
                    
                    broom::tidy(m6) %>% mutate(model_name = "m6"),
                    
                    broom::tidy(m7) %>% mutate(model_name = "m7"),
                    
                    broom::tidy(m8) %>% mutate(model_name = "m8"))

save(models, file="table_D_3.Rdata")




#####################################################################
#####################################################################
## Appendix Table D.4
#####################################################################
#####################################################################

# sectorXyear effects
fmla1<- as.formula(paste("salheiser_positions_correct~cohort_binary + post1962 + cohort_binary*post1962| sector_factor:year_factor+pkz|0|age_1962"))
fmla2 <- as.formula(paste("nc_dummy~cohort_binary + post1962 + cohort_binary*post1962| sector_factor:year_factor+pkz|0|age_1962"))


#####################################
# male 18-25 vs. male 26--33
#####################################
# subset data to men
data_diff <- data[data$female==0,]
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>17,]
data_diff <- data_diff[data_diff$age_1962<34,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

m1 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m2 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])


# Get standard errors
se1 <- m1$cse
se2 <- m2$cse

#####################################
# male <26 vs. male older than 25
#####################################
# subset data to men
data_diff <- data[data$female==0,]

# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

m3 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m4 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se3 <- m3$cse
se4 <- m4$cse

#####################################
# male 18--25 vs. female 18--25
#####################################
data_diff <- data
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>17,]
data_diff <- data_diff[data_diff$age_1962<26,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$female==0,1,0)

m5 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m6 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se5 <- m5$cse
se6 <- m6$cse


#####################################
# male 23-25 vs. male 26-28
#####################################
# subset data to men
data_diff <- data[data$female==0,]
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>22,]
data_diff <- data_diff[data_diff$age_1962<29,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

m7 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m8 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])
# Get standard errors
se7 <- m7$cse
se8 <- m8$cse

#####################################
# Make Table
#####################################
# Table in .tex:
stargazer(m1,m2,m3,m4,m5,m6,m7,m8,type="latex",notes = "Standard errors are clustered at the age-cohort level.",
          style="qje", 
          title            = "Analysis of System Engagement",
          dep.var.caption  = "",
          dep.var.labels.include = FALSE,
          column.labels   = c("Rank","Nomenclature","Rank","Nomenclature","Rank","Nomenclature","Rank","Nomenclature"),
          se=list(se1,se2,se3,se4,se5,se6,se7,se8),
          add.lines = list(c("Treated","M 18-25","M 18-25","M <26","M <26","M 18-25","M 18-25","M 23-25","M 23-25"),
                           c("Control","M 26-33","M 26-33","M >25","M >25","F 18-25","F 18-25","M 26-28","M 26-28"),
                           c("Sector FE:Year FE","Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes"),
                           c("Individual-Level FE","Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes")),
          digits=2,label="tab:diff:d4",
          out="diff_in_diff_time_tabled4.tex"
)

models <- bind_rows(broom::tidy(m1) %>% mutate(model_name = "m1"),
                    
                    broom::tidy(m2) %>% mutate(model_name = "m2"),
                    
                    broom::tidy(m3) %>% mutate(model_name = "m3"),
                    
                    broom::tidy(m4) %>% mutate(model_name = "m4"),
                    
                    broom::tidy(m5) %>% mutate(model_name = "m5"),
                    
                    broom::tidy(m6) %>% mutate(model_name = "m6"),
                    
                    broom::tidy(m7) %>% mutate(model_name = "m7"),
                    
                    broom::tidy(m8) %>% mutate(model_name = "m8"))

save(models, file="tabled4.Rdata")



#####################################################################
#####################################################################
## Appendix Table D.5
#####################################################################
#####################################################################

# diff-in-diff, only adjacent male cohorts, with individual level FE
fmla1 <- as.formula(paste("salheiser_positions_correct~cohort_binary + post1962 + cohort_binary*post1962 +time_trend*no_relatives_abroad+time_trend*no_returnee+time_trend*father_kpd_sed+time_trend*mother_kpd_sed+time_trend*worker_farmer+time_trend*intelligence+time_trend*economic_elite| year+sector_cf+pkz|0|age_1962"))
fmla2 <- as.formula(paste("nc_dummy~cohort_binary + post1962 + cohort_binary*post1962+time_trend*no_relatives_abroad+time_trend*no_returnee+time_trend*father_kpd_sed+time_trend*mother_kpd_sed+time_trend*worker_farmer+time_trend*intelligence+time_trend*economic_elite| year+sector_cf+pkz|0|age_1962"))

#####################################
# male 18-25 vs. male 26--33
#####################################
# subset data to men
data_diff <- data[data$female==0,]
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>17,]
data_diff <- data_diff[data_diff$age_1962<34,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

m1 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m2 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se1 <- m1$cse
se2 <- m2$cse

#####################################
# male <26 vs. male older than 25
#####################################
# subset data to men
data_diff <- data[data$female==0,]

# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

m3 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m4 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se3 <- m3$cse
se4 <- m4$cse

#####################################
# male 18--25 vs. female 18--25
#####################################
data_diff <- data
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>17,]
data_diff <- data_diff[data_diff$age_1962<26,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$female==0,1,0)

m5 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m6 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se5 <- m5$cse
se6 <- m6$cse


#####################################
# male 23-25 vs.male 26-28
#####################################
# subset data to men
data_diff <- data[data$female==0,]
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>22,]
data_diff <- data_diff[data_diff$age_1962<29,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

m7 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,],exactDOF='rM')
m8 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,],exactDOF='rM')
# Get standard errors
se7 <- m7$cse
se8 <- m8$cse


#####################################
# Make Table
#####################################
# Table in .tex:
stargazer(m1,m2,m3,m4,m5,m6,m7,m8,type="latex",notes = "Standard errors are clustered at the age-cohort level.",
          style="qje", 
          title            = "Analysis of System Engagement",
          covariate.labels = c("Affected Cohort*Post-1961"),
          dep.var.caption  = "",
          dep.var.labels.include = FALSE,
          column.labels   = c("Rank","NK","Rank","NK","Rank","NK","Rank","NK"),
          se=list(se1,se2,se3,se4,se5,se6,se7,se8),
          add.lines = list(c("Treated","M 18-25","M 18-25","M <26","M <26","M 18-25","M 18-25","M <26","M 23-25"),
                           c("Control","M 26-33","M 26-33","M >25","M >25","F 18-25","F 18-25","M >25,F","M 26-28"),
                           c("Controls X Time Trend","Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes"),
                           c("Year FE","Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes"),
                           c("Sector FE","Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes"),
                           c("Individual-Level FE","Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes")),
          digits=2,label="tab:diff:d5",
          out="diff_in_diff_event_tabled5.tex"
)


models <- bind_rows(broom::tidy(m1) %>% mutate(model_name = "m1"),
                    
                    broom::tidy(m2) %>% mutate(model_name = "m2"),
                    
                    broom::tidy(m3) %>% mutate(model_name = "m3"),
                    
                    broom::tidy(m4) %>% mutate(model_name = "m4"),
                    
                    broom::tidy(m5) %>% mutate(model_name = "m5"),
                    
                    broom::tidy(m6) %>% mutate(model_name = "m6"),
                    
                    broom::tidy(m7) %>% mutate(model_name = "m7"),
                    
                    broom::tidy(m8) %>% mutate(model_name = "m8"))

save(models, file="tabled5.Rdata")



###############################################
###############################################
###  Appendix Figure E.3
###############################################
###############################################


# select men
data_model <- data_rd_25_26[data_rd_25_26$female==0,]

# plot for the treatment
pdf("~/Dropbox/DDR/Figures/NVA/rdd_treatment1.pdf")
rdplot(data_model$nva_dummy_ever, data_model$age_1962,c=25.5, binselect = "esmv", y.lim = c(0,0.8),x.label=c("Age in 1962"),y.label=c("P(NVA Service)"),title="NVA Service",col.lines="black",type.dots=1)
dev.off()


###############################################
###############################################
###  Appendix Table E.6
###############################################
###############################################
# select men
data_model <- data_rd_25_26[data_rd_25_26$female==0,]

running <- data_model$age_1962 - 25.5
w_low <- -1.5
w_high <- 1.5

# basic RD model salheiser
rd1 <- rdrandinf(Y=data_model$salheiser_position_at25, R=running,cutoff=0,fuzzy=data_model$nva_dummy_ever, wl=w_low,wr=w_high, seed = 50)
rd2 <- rdrandinf(Y=data_model$salheiser_position_at30, R=running,cutoff=0,fuzzy=data_model$nva_dummy_ever, wl=w_low,wr=w_high, seed = 50)
rd3 <- rdrandinf(Y=data_model$salheiser_position_at35, R=running,cutoff=0,fuzzy=data_model$nva_dummy_ever, wl=w_low,wr=w_high, seed = 50)
rd4 <- rdrandinf(Y=data_model$salheiser_position_at40, R=running,cutoff=0,fuzzy=data_model$nva_dummy_ever, wl=w_low,wr=w_high, seed = 50)
rd5 <- rdrandinf(Y=data_model$salheiser_position_at45, R=running,cutoff=0,fuzzy=data_model$nva_dummy_ever, wl=w_low,wr=w_high, seed = 50)
rd6 <- rdrandinf(Y=data_model$salheiser_position_at50, R=running,cutoff=0,fuzzy=data_model$nva_dummy_ever, wl=w_low,wr=w_high, seed = 50)

var <- c("Rank at 25","Rank at 30","Rank at 35","Rank at 40", "Rank at 45","Rank at 50")
treated <- c(rd1$sumstats[3,1],rd2$sumstats[3,1],rd3$sumstats[3,1],rd4$sumstats[3,1],rd5$sumstats[3,1],rd6$sumstats[3,1])
control <- c(rd1$sumstats[3,2],rd2$sumstats[3,2],rd3$sumstats[3,2],rd4$sumstats[3,2],rd5$sumstats[3,2],rd6$sumstats[3,2])
difference <- c(rd1$sumstats[3,1]-rd1$sumstats[3,2],rd2$sumstats[3,1]-rd2$sumstats[3,2],rd3$sumstats[3,1]-rd3$sumstats[3,2],rd4$sumstats[3,1]-rd4$sumstats[3,2],rd5$sumstats[3,1]-rd5$sumstats[3,2],rd6$sumstats[3,1]-rd6$sumstats[3,2])
pvalue <- c(rd1$p.value,rd2$p.value,rd3$p.value,rd4$p.value,rd5$p.value,rd6$p.value)

results <- data.frame(var,treated,control,difference,pvalue)
colnames(results) <- c("Outcome","Treated","Control","Difference","P-Value")
print(xtable(results,caption=c("RDD Estimates"),label=c("tab:rdd:e6"),type = "latex",digits=c(0,0,3,3,3,3)), file = "~/Dropbox/DDR/Tables/NVA_Tables/rdd_results_e6.tex",include.rownames=FALSE)


###############################################
###############################################
###  Appendix Table E.7
###############################################
###############################################
# select men
data_model <- data_rd_25_26[data_rd_25_26$female==0,]

running <- data_model$age_1962 - 25.5
w_low <- -1.5
w_high <- 1.5


# basic RD model nc_dummy
rd1 <- rdrandinf(Y=data_model$nc_dummy_at25, R=running,cutoff=0,fuzzy=data_model$nva_dummy_ever, wl=w_low,wr=w_high, seed = 50)
rd2 <- rdrandinf(Y=data_model$nc_dummy_at30, R=running,cutoff=0,fuzzy=data_model$nva_dummy_ever, wl=w_low,wr=w_high, seed = 50)
rd3 <- rdrandinf(Y=data_model$nc_dummy_at35, R=running,cutoff=0,fuzzy=data_model$nva_dummy_ever, wl=w_low,wr=w_high, seed = 50)
rd4 <- rdrandinf(Y=data_model$nc_dummy_at40, R=running,cutoff=0,fuzzy=data_model$nva_dummy_ever, wl=w_low,wr=w_high, seed = 50)
rd5 <- rdrandinf(Y=data_model$nc_dummy_at45, R=running,cutoff=0,fuzzy=data_model$nva_dummy_ever, wl=w_low,wr=w_high, seed = 50)
rd6 <- rdrandinf(Y=data_model$nc_dummy_at50, R=running,cutoff=0,fuzzy=data_model$nva_dummy_ever, wl=w_low,wr=w_high, seed = 50)

var <- c("Nomenclature at 25","Nomenclature at 30","Nomenclature at 35","Nomenclature at 40", "Nomenclature at 45","Nomenclature at 50")
treated <- c(rd1$sumstats[3,1],rd2$sumstats[3,1],rd3$sumstats[3,1],rd4$sumstats[3,1],rd5$sumstats[3,1],rd6$sumstats[3,1])
control <- c(rd1$sumstats[3,2],rd2$sumstats[3,2],rd3$sumstats[3,2],rd4$sumstats[3,2],rd5$sumstats[3,2],rd6$sumstats[3,2])
difference <- c(rd1$sumstats[3,1]-rd1$sumstats[3,2],rd2$sumstats[3,1]-rd2$sumstats[3,2],rd3$sumstats[3,1]-rd3$sumstats[3,2],rd4$sumstats[3,1]-rd4$sumstats[3,2],rd5$sumstats[3,1]-rd5$sumstats[3,2],rd6$sumstats[3,1]-rd6$sumstats[3,2])
pvalue <- c(rd1$p.value,rd2$p.value,rd3$p.value,rd4$p.value,rd5$p.value,rd6$p.value)

results <- data.frame(var,treated,control,difference,pvalue)
colnames(results) <- c("Outcome","Treated","Control","Difference","P-Value")
print(xtable(results,caption=c("RDD Estimates"),label=c("tab:rdd:e7"),type = "latex",digits=c(0,0,3,3,3,3)), file = "~/Dropbox/DDR/Tables/NVA_Tables/rdd_results_e7.tex",include.rownames=FALSE)


###############################################
###############################################
###  Appendix Table E.8
###############################################
###############################################
data_model <- data_rd_25[data_rd_25$female==1,]

# outcome plots for women without age 25
rdplot(data_model$salheiser_position_at40, data_model$age_1962,c=25.5, binselect = "esmv", y.lim = c(0,4),x.label=c("Age in 1962"),title=c("Rank"),col.lines="black",type.dots=1)
rdplot(data_model$nc_dummy_at50, data_model$age_1962,c=25.5, binselect = "esmv", y.lim = c(0,0.5),x.label=c("Age in 1962"),title=c("Nomenclatura"),col.lines="black",type.dots=1)
rdplot(data_model$party_sed_at50, data_model$age_1962,c=25.5, binselect = "esmv", y.lim = c(0,0.4),x.label=c("Age in 1962"),title=c("SED Membership"),col.lines="black",type.dots=1)
rdplot(data_model$org_count_at50, data_model$age_1962,c=25.5, binselect = "esmv", y.lim = c(0,4),x.label=c("Age in 1962"),title=c("Org Count"),col.lines="black",type.dots=1)
rdplot(data_model$number_of_vol_functions_at50, data_model$age_1962,c=25.5, binselect = "esmv", y.lim = c(0,6),x.label=c("Age in 1962"),title=c("Voluntary Functions"),col.lines="black",type.dots=1)
rdplot(data_model$sh_medals_at50, data_model$age_1962,c=25.5, binselect = "esmv", y.lim = c(0,150),x.label=c("Age in 1962"),title=c("SH Medals"),col.lines="black",type.dots=1)


# basic RD model salheiser
rd1 <- rdrandinf(Y=data_model$salheiser_position_at25, R=running,cutoff=0,fuzzy=data_model$nva_dummy_ever, wl=w_low,wr=w_high, seed = 50)
rd2 <- rdrandinf(Y=data_model$salheiser_position_at30, R=running,cutoff=0,fuzzy=data_model$nva_dummy_ever, wl=w_low,wr=w_high, seed = 50)
rd3 <- rdrandinf(Y=data_model$salheiser_position_at35, R=running,cutoff=0,fuzzy=data_model$nva_dummy_ever, wl=w_low,wr=w_high, seed = 50)
rd4 <- rdrandinf(Y=data_model$salheiser_position_at40, R=running,cutoff=0,fuzzy=data_model$nva_dummy_ever, wl=w_low,wr=w_high, seed = 50)
rd5 <- rdrandinf(Y=data_model$salheiser_position_at45, R=running,cutoff=0,fuzzy=data_model$nva_dummy_ever, wl=w_low,wr=w_high, seed = 50)
rd6 <- rdrandinf(Y=data_model$salheiser_position_at50, R=running,cutoff=0,fuzzy=data_model$nva_dummy_ever, wl=w_low,wr=w_high, seed = 50)

var <- c("Rank at 25","Rank at 30","Rank at 35","Rank at 40", "Rank at 45","Rank at 50")
treated <- c(rd1$sumstats[3,1],rd2$sumstats[3,1],rd3$sumstats[3,1],rd4$sumstats[3,1],rd5$sumstats[3,1],rd6$sumstats[3,1])
control <- c(rd1$sumstats[3,2],rd2$sumstats[3,2],rd3$sumstats[3,2],rd4$sumstats[3,2],rd5$sumstats[3,2],rd6$sumstats[3,2])
difference <- c(rd1$sumstats[3,1]-rd1$sumstats[3,2],rd2$sumstats[3,1]-rd2$sumstats[3,2],rd3$sumstats[3,1]-rd3$sumstats[3,2],rd4$sumstats[3,1]-rd4$sumstats[3,2],rd5$sumstats[3,1]-rd5$sumstats[3,2],rd6$sumstats[3,1]-rd6$sumstats[3,2])
pvalue <- c(rd1$p.value,rd2$p.value,rd3$p.value,rd4$p.value,rd5$p.value,rd6$p.value)

results <- data.frame(var,treated,control,difference,pvalue)
colnames(results) <- c("Outcome","Treated","Control","Difference","P-Value")
print(xtable(results,caption=c("RDD Estimates, Women, Only Excluding Age 25"),label=c("tab:rdd:sal:e8"),type = "latex",digits=c(0,0,3,3,3,3)), file = "~/Dropbox/DDR/Tables/NVA_Tables/rdd_results_salheiser_e8.tex",include.rownames=FALSE)


###############################################
###############################################
###  Appendix Table E.8
###############################################
###############################################
data_model <- data_rd_25[data_rd_25$female==1,]

# basic RD model nc_dummy
rd1 <- rdrandinf(Y=data_model$nc_dummy_at25, R=running,cutoff=0,fuzzy=data_model$nva_dummy_ever, wl=w_low,wr=w_high, seed = 50)
rd2 <- rdrandinf(Y=data_model$nc_dummy_at30, R=running,cutoff=0,fuzzy=data_model$nva_dummy_ever, wl=w_low,wr=w_high, seed = 50)
rd3 <- rdrandinf(Y=data_model$nc_dummy_at35, R=running,cutoff=0,fuzzy=data_model$nva_dummy_ever, wl=w_low,wr=w_high, seed = 50)
rd4 <- rdrandinf(Y=data_model$nc_dummy_at40, R=running,cutoff=0,fuzzy=data_model$nva_dummy_ever, wl=w_low,wr=w_high, seed = 50)
rd5 <- rdrandinf(Y=data_model$nc_dummy_at45, R=running,cutoff=0,fuzzy=data_model$nva_dummy_ever, wl=w_low,wr=w_high, seed = 50)
rd6 <- rdrandinf(Y=data_model$nc_dummy_at50, R=running,cutoff=0,fuzzy=data_model$nva_dummy_ever, wl=w_low,wr=w_high, seed = 50)

var <- c("Nomenclature at 25","Nomenclature at 30","Nomenclature at 35","Nomenclature at 40", "Nomenclature at 45","Nomenclature at 50")
treated <- c(rd1$sumstats[3,1],rd2$sumstats[3,1],rd3$sumstats[3,1],rd4$sumstats[3,1],rd5$sumstats[3,1],rd6$sumstats[3,1])
control <- c(rd1$sumstats[3,2],rd2$sumstats[3,2],rd3$sumstats[3,2],rd4$sumstats[3,2],rd5$sumstats[3,2],rd6$sumstats[3,2])
difference <- c(rd1$sumstats[3,1]-rd1$sumstats[3,2],rd2$sumstats[3,1]-rd2$sumstats[3,2],rd3$sumstats[3,1]-rd3$sumstats[3,2],rd4$sumstats[3,1]-rd4$sumstats[3,2],rd5$sumstats[3,1]-rd5$sumstats[3,2],rd6$sumstats[3,1]-rd6$sumstats[3,2])
pvalue <- c(rd1$p.value,rd2$p.value,rd3$p.value,rd4$p.value,rd5$p.value,rd6$p.value)

results <- data.frame(var,treated,control,difference,pvalue)
colnames(results) <- c("Outcome","Treated","Control","Difference","P-Value")
print(xtable(results,caption=c("RDD Estimates, Women, Only Excluding Age 25"),label=c("tab:rdd:nc:e9"),type = "latex",digits=c(0,0,3,3,3,3)), file = "~/Dropbox/DDR/Tables/NVA_Tables/rdd_results_nc_dummy_e9.tex",include.rownames=FALSE)





#####################################################################
#####################################################################
## Appendix Table F.10
#####################################################################
#####################################################################

# diff-in-diff, only adjacent male cohorts, with individual level FE
fmla1 <- as.formula(paste("salheiser_positions_correct~cohort_binary + post1962 + cohort_binary*post1962| year+pkz|0|age_1962"))
fmla2 <- as.formula(paste("nc_dummy~cohort_binary + post1962 + cohort_binary*post1962| year+pkz|0|age_1962"))

#####################################
# male 18-25 vs. male 26--33
#####################################
# subset data to men
data_diff <- data[data$female==0,]
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>17,]
data_diff <- data_diff[data_diff$age_1962<34,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

m1 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m2 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se1 <- m1$cse
se2 <- m2$cse

#####################################
# male <26 vs. male older than 25
#####################################
# subset data to men
data_diff <- data[data$female==0,]

# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

m3 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m4 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se3 <- m3$cse
se4 <- m4$cse

#####################################
# male 18--25 vs. female 18--25
#####################################
data_diff <- data
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>17,]
data_diff <- data_diff[data_diff$age_1962<26,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$female==0,1,0)

m5 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m6 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se5 <- m5$cse
se6 <- m6$cse


#####################################
# male 23-25 vs.male 26-28
#####################################
# subset data to men
data_diff <- data[data$female==0,]
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>22,]
data_diff <- data_diff[data_diff$age_1962<29,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

m7 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,],exactDOF='rM')
m8 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,],exactDOF='rM')
# Get standard errors
se7 <- m7$cse
se8 <- m8$cse


#####################################
# Make Table
#####################################
# Table in .tex:
stargazer(m1,m2,m3,m4,m5,m6,m7,m8,type="latex",notes = "Standard errors are clustered at the age-cohort level.",
          style="qje", 
          title            = "Analysis of System Engagement",
          covariate.labels = c("Affected Cohort*Post-1961"),
          dep.var.caption  = "",
          dep.var.labels.include = FALSE,
          column.labels   = c("Rank","Nomenclature","Rank","Nomenclature","Rank","Nomenclature","Rank","Nomenclature"),
          se=list(se1,se2,se3,se4,se5,se6,se7,se8),
          add.lines = list(c("Treated","M 18-25","M 18-25","M <26","M <26","M 18-25","M 18-25","M <26","M 23-25"),
                           c("Control","M 26-33","M 26-33","M >25","M >25","F 18-25","F 18-25","M >25,F","M 26-28"),
                           c("Year FE","Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes"),
                           c("Sector FE","No","No","No","No","No","No","No","No"),
                           c("Individual-Level FE","Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes")),
          digits=2,label="tab:diff:f10",keep=c(3),
          out="diff_in_diff_main_table_f10.tex"
)

models <- bind_rows(broom::tidy(m1) %>% mutate(model_name = "m1"),
                    
                    broom::tidy(m2) %>% mutate(model_name = "m2"),
                    
                    broom::tidy(m3) %>% mutate(model_name = "m3"),
                    
                    broom::tidy(m4) %>% mutate(model_name = "m4"),
                    
                    broom::tidy(m5) %>% mutate(model_name = "m5"),
                    
                    broom::tidy(m6) %>% mutate(model_name = "m6"),
                    
                    broom::tidy(m7) %>% mutate(model_name = "m7"),
                    
                    broom::tidy(m8) %>% mutate(model_name = "m8"))

save(models, file="table_f10.Rdata")

#####################################################################
#####################################################################
## Appendix Table F.11
#####################################################################
#####################################################################


# define diff-in-diff time slices
data$diff_in_diff <- ifelse(data$year==1989 | data$year==1960,1,0)

# diff-in-diff, only adjacent male cohorts, with individual level FE
fmla1 <- as.formula(paste("salheiser_positions_correct~cohort_binary + post1962 + cohort_binary*post1962|0 |0|pkz"))
fmla2 <- as.formula(paste("nc_dummy~cohort_binary + post1962 + cohort_binary*post1962| 0|0|pkz"))

#####################################
# male 18-25 vs. male 26--33
#####################################
# subset data to men
data_diff <- data[data$female==0,]
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>17,]
data_diff <- data_diff[data_diff$age_1962<34,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

# subset to diff-in-diff slices
data_diff <- data_diff[data_diff$diff_in_diff==1,]


m1 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m2 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se1 <- m1$cse
se2 <- m2$cse

#####################################
# male <26 vs. male older than 25
#####################################
# subset data to men
data_diff <- data[data$female==0,]

# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

# subset to diff-in-diff slices
data_diff <- data_diff[data_diff$diff_in_diff==1,]


m3 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m4 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se3 <- m3$cse
se4 <- m4$cse

#####################################
# male 18--25 vs. female 18--25
#####################################
data_diff <- data
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>17,]
data_diff <- data_diff[data_diff$age_1962<26,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$female==0,1,0)

# subset to diff-in-diff slices
data_diff <- data_diff[data_diff$diff_in_diff==1,]

m5 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m6 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se5 <- m5$cse
se6 <- m6$cse


#####################################
# male <26 vs. female or male >25
#####################################
# subset data to men
data_diff <- data[data$female==0,]


# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>17,]
data_diff <- data_diff[data_diff$age_1962<34,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

# subset to diff-in-diff slices
data_diff <- data_diff[data_diff$diff_in_diff==1,]


m7 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m8 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])
# Get standard errors
se7 <- m7$cse
se8 <- m8$cse


#####################################
# Make Table
#####################################
# Table in .tex:
stargazer(m1,m2,m3,m4,m5,m6,m7,m8,type="latex",notes = "Standard errors are clustered at the individual level.",
          style="qje", 
          title            = "Analysis of System Engagement",
          covariate.labels = c("Constant",
                               "Affected Cohort",
                               "Post-1961",
                               "Affected Cohort*Post-1961"),
          dep.var.caption  = "",
          dep.var.labels.include = FALSE,
          column.labels   = c("Rank","NK","Rank","NK","Rank","NK","Rank","NK"),
          se=list(se1,se2,se3,se4,se5,se6,se7,se8),
          add.lines = list(c("Treated","M 18-25","M 18-25","M <26","M <26","M 18-25","M 18-25","M 23-25","M 23-25"),
                           c("Control","M 26-33","M 26-33","M >25","M >25","F 18-25","F 18-25","M 26-28","M 26-28"),
                           c("Year FE","Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes"),
                           c("Sector FE","Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes"),
                           c("Individual-Level FE","Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes")),
          digits=2,label="tab:diff:f11",
          out="diff_in_diff_f11.tex"
)

models <- bind_rows(broom::tidy(m1) %>% mutate(model_name = "m1"),
                    
                    broom::tidy(m2) %>% mutate(model_name = "m2"),
                    
                    broom::tidy(m3) %>% mutate(model_name = "m3"),
                    
                    broom::tidy(m4) %>% mutate(model_name = "m4"),
                    
                    broom::tidy(m5) %>% mutate(model_name = "m5"),
                    
                    broom::tidy(m6) %>% mutate(model_name = "m6"),
                    
                    broom::tidy(m7) %>% mutate(model_name = "m7"),
                    
                    broom::tidy(m8) %>% mutate(model_name = "m8"))

save(models, file="table_f11.Rdata")


#####################################################################
#####################################################################
## Appendix Table F.12
#####################################################################
#####################################################################
# diff-in-diff, only adjacent male cohorts, with individual level FE
fmla1 <- as.formula(paste("salheiser_positions_correct~cohort_binary + post_age + cohort_binary*post_age| year+sector_cf+pkz|0|age_1962"))
fmla2 <- as.formula(paste("nc_dummy~cohort_binary + post_age + cohort_binary*post_age| year+sector_cf+pkz|0|age_1962"))

# subset data to men
data_diff <- data[data$female==0,]
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>17,]
data_diff <- data_diff[data_diff$age_1962<34,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)
data_diff$post_age <- ifelse(data_diff$age>25,1,0)

m1 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m2 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se1 <- m1$cse
se2 <- m2$cse

# Table in .tex:
stargazer(m1,m2,type="latex",notes = "Standard errors are clustered at the age-cohort level.",
          style="qje", 
          title            = "Analysis of System Engagement",
          covariate.labels = c("Affected Cohort*Post-1961"),
          dep.var.caption  = "",
          dep.var.labels.include = FALSE,
          column.labels   = c("Rank","Nomenclature"),
          se=list(se1,se2),
          add.lines = list(c("Treated","M 18-25","M 18-25"),
                           c("Control","M 26-33","M 26-33"),
                           c("Year FE","Yes","Yes"),
                           c("Sector FE","Yes","Yes"),
                           c("Individual-Level FE","Yes","Yes")),
          digits=2,label="tab:diff:f12",keep=c(3),
          out="diff_in_diff_age_table_f12.tex"
)

models <- bind_rows(broom::tidy(m1) %>% mutate(model_name = "m1"),
                    
                    broom::tidy(m2) %>% mutate(model_name = "m2"))

save(models, file="table_f12.Rdata")



#####################################################################
#####################################################################
## Appendix Table F.13
#####################################################################
#####################################################################

# diff-in-diff, only adjacent male cohorts, with individual level FE
fmla1 <- as.formula(paste("salheiser_positions_correct~cohort_binary + post1962 + cohort_binary*post1962| year+sector_cf+pkz|0|age_1962"))
fmla2 <- as.formula(paste("nc_dummy~cohort_binary + post1962 + cohort_binary*post1962| year+sector_cf+pkz|0|age_1962"))

#####################################
# female 18-25 vs. female 26--33
#####################################
# subset data to men
data_diff <- data[data$female==1,]
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>17,]
data_diff <- data_diff[data_diff$age_1962<34,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

m1 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m2 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se1 <- m1$cse
se2 <- m2$cse

#####################################
# female <26 vs. female older than 25
#####################################
# subset data to men
data_diff <- data[data$female==1,]

# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

m3 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m4 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se3 <- m3$cse
se4 <- m4$cse


#####################################
# female 23-25 vs. female 26-28
#####################################
# subset data to men
data_diff <- data[data$female==1,]
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>22,]
data_diff <- data_diff[data_diff$age_1962<29,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

m5 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,],exactDOF='rM')
m6 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,],exactDOF='rM')
# Get standard errors
se5 <- m5$cse
se6 <- m6$cse

#####################################
# Make Table
#####################################
# Table in .tex:
stargazer(m1,m2,m3,m4,m5,m6,type="latex",notes = "Standard errors are clustered at the age-cohort level.",
          style="qje", 
          title            = "Analysis of System Engagement",
          covariate.labels = c("Affected Cohort*Post-1961"),
          dep.var.caption  = "",
          dep.var.labels.include = FALSE,
          column.labels   = c("Rank","Nomenclature","Rank","Nomenclature","Rank","Nomenclature"),
          se=list(se1,se2,se3,se4,se5,se6),
          add.lines = list(c("Treated","F 18-25","F 18-25","F <26","F <26","F 23-25","F 23-25"),
                           c("Control","F 26-33","F 26-33","F >25","F >25","F 26-28","F 26-28"),
                           c("Year FE","Yes","Yes","Yes","Yes","Yes","Yes"),
                           c("Sector FE","Yes","Yes","Yes","Yes","Yes","Yes"),
                           c("Individual-Level FE","Yes","Yes","Yes","Yes","Yes","Yes")),
          digits=2,label="tab:diff:f13",keep=c(3),
          out="diff_in_diff_placebo_table_f13.tex"
)




models <- bind_rows(broom::tidy(m1) %>% mutate(model_name = "m1"),
                    
                    broom::tidy(m2) %>% mutate(model_name = "m2"),
                    
                    broom::tidy(m3) %>% mutate(model_name = "m3"),
                    
                    broom::tidy(m4) %>% mutate(model_name = "m4"),
                    
                    broom::tidy(m3) %>% mutate(model_name = "m5"),
                    
                    broom::tidy(m4) %>% mutate(model_name = "m6"))

save(models, file="table_f13.Rdata")



#####################################################################
#####################################################################
## Appendix Section G.1
#####################################################################
#####################################################################

# diff-in-diff, only adjacent male cohorts, with individual level FE
fmla1 <- as.formula(paste("education_rank~cohort_binary + post1962 + cohort_binary*post1962| year+sector_cf+pkz|0|age_1962"))
fmla2 <- as.formula(paste("special_knowledge_dummy~cohort_binary + post1962 + cohort_binary*post1962| year+sector_cf+pkz|0|age_1962"))
fmla3 <- as.formula(paste("additional_ed_duration~cohort_binary + post1962 + cohort_binary*post1962| year+sector_cf+pkz|0|age_1962"))
fmla4 <- as.formula(paste("degree_in_ussr~cohort_binary + post1962 + cohort_binary*post1962| year+sector_cf+pkz|0|age_1962"))


#####################################
# male 18-25 vs. male 26--33
#####################################
# subset data to men
data_diff <- data[data$female==0,]

#remove Berufssoldaten
data_diff <- data_diff[data_diff$military_service_duration_in_years<=3,]


# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>17,]
data_diff <- data_diff[data_diff$age_1962<34,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

m1 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m2 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])
m3 <- felm(fmla3, data = data_diff[data_diff$working_sample==1,])
m4 <- felm(fmla4, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se1 <- m1$cse
se2 <- m2$cse
se3 <- m3$cse
se4 <- m4$cse

#####################################
# Make Table
#####################################
# Table in .tex:
stargazer(m1,m2,m3,m4,type="latex",notes = "Standard errors are clustered at the age-cohort level.",
          style="qje", 
          title            = "Analysis of System Engagement",
          covariate.labels = c("Affected Cohort*Post-1961"),
          dep.var.caption  = "",
          dep.var.labels.include = FALSE,
          column.labels   = c("Education","Special Knowledge","Additional Education","Degree USSR"),
          se=list(se1,se2,se3,se4),
          add.lines = list(c("Treated","M 18-25","M 18-25","M 18-25","M 18-25"),
                           c("Control","M 26-33","M 26-33","M 26-33","M 26-33"),
                           c("Year FE","Yes","Yes","Yes","Yes"),
                           c("Sector FE","Yes","Yes","Yes","Yes"),
                           c("Individual-Level FE","Yes","Yes","Yes","Yes")),
          digits=2,label="tab:diff:g1",keep=c(3),
          out="diff_in_diff_g1.tex"
)

models <- bind_rows(broom::tidy(m1) %>% mutate(model_name = "m1"),
                    
                    broom::tidy(m2) %>% mutate(model_name = "m2"),
                    
                    broom::tidy(m3) %>% mutate(model_name = "m3"),
                    
                    broom::tidy(m4) %>% mutate(model_name = "m4"))

# save(models, file="table_g1.Rdata")


# make figure from saved model estimates
# Figure: Education Coefplot  ------------------------------------------------

education_results <- models %>% 
  filter(!is.na(estimate)) %>% 
  mutate(term = c("Education",  "Special Knowledge (dummy)", "Additional Education","Degree USSR")) %>% 
  mutate(term = forcats::fct_reorder(term, estimate))

education_ccdb_coefplot <- ggplot(education_results, 
                                  aes(x = term, y =estimate)) + 
  geom_point(size = 1) + 
  geom_errorbar(aes(ymin = estimate - 2 * std.error, 
                    ymax = estimate + 2 * std.error), width = 0) + 
  geom_hline(yintercept = 0, linetype = "dashed") +
  theme_bw() +
  labs(y = "Coefficient",
       x = "Dependent Variables", 
       subtitle = "Indep. Variable: Post 1962 x Affected Cohort", 
       caption = "N = 2,226,243") +
  scale_x_discrete(labels = c("Additional\nEducation\n(months)", 
                              "Special\nKnowledge\n(Dummy)", 
                              "Degree\n USSR", 
                              "Education\n(0-4)")) +
  theme(panel.grid = element_blank(), 
        plot.title = element_text(hjust = 0.5), 
        plot.subtitle = element_text(size = 8), 
        axis.text.x = element_text(size = 7))

ggsave(education_ccdb_coefplot, filename = "./Figures/NVA/educ_mechanism_plot.pdf", 
       width = 4.5, height = 4, scale = 0.95)


ggsave(education_ccdb_coefplot, filename = "./Figures/NVA/educ_mechanism_plot.pdf", 
       width = 4.5, height = 4, scale = 0.95)




#####################################################################
#####################################################################
## Appendix Table G.14
#####################################################################
#####################################################################


# diff-in-diff, only adjacent male cohorts, with individual level FE
fmla1 <- as.formula(paste("salheiser_positions_correct~cohort_binary + post1962 + cohort_binary*post1962 +abi_in_POS_cohort + cohort_binary*abi_in_POS_cohort + post1962*abi_in_POS_cohort+ cohort_binary*post1962*abi_in_POS_cohort| year+sector_cf+pkz|0|age_1962"))
fmla2 <- as.formula(paste("nc_dummy~cohort_binary + post1962 + cohort_binary*post1962+ abi_in_POS_cohort + cohort_binary*abi_in_POS_cohort + post1962*abi_in_POS_cohort+ cohort_binary*post1962*abi_in_POS_cohort| year+sector_cf+pkz|0|age_1962"))
fmla3 <- as.formula(paste("salheiser_positions_correct~cohort_binary + post1962 + cohort_binary*post1962 +POS_in_abi_cohort + cohort_binary*POS_in_abi_cohort + post1962*POS_in_abi_cohort+ cohort_binary*post1962*POS_in_abi_cohort| year+sector_cf+pkz|0|age_1962"))
fmla4 <- as.formula(paste("nc_dummy~cohort_binary + post1962 + cohort_binary*post1962+ POS_in_abi_cohort + cohort_binary*POS_in_abi_cohort + post1962*POS_in_abi_cohort+ cohort_binary*post1962*POS_in_abi_cohort| year+sector_cf+pkz|0|age_1962"))



#####################################
# only men that served, male 18-25 vs. male 26--33
#####################################
# subset data to men
data_diff <- data[data$female==0,]
data_diff <- data_diff[data_diff$nva_18_info==1,]

# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>17,]
data_diff <- data_diff[data_diff$age_1962<34,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

m1 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m2 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])
m3 <- felm(fmla3, data = data_diff[data_diff$working_sample==1,])
m4 <- felm(fmla4, data = data_diff[data_diff$working_sample==1,])


# Get standard errors
se1 <- m1$cse
se2 <- m2$cse
se3 <- m3$cse
se4 <- m4$cse


#####################################
# Make Table
#####################################
# Table in .tex:
stargazer(m1,m2,m3,m4,type="latex",notes = "Standard errors are clustered at the age-cohortl level.",
          style="qje", 
          title            = "Analysis of System Engagement",
          dep.var.caption  = "",
          covariate.labels = c("Affected Cohort",
                               "Post-1961",
                               "Abi in POS-Cohort",
                               "POS in Abi-Cohort",
                               "Affected Cohort*Post-1961",
                               "Affected Cohort*Abi in POS-Cohort",
                               "Post-1961*Abi in POS-Cohort",
                               "Affected Cohort*Abi in POS-Cohort*Post-1961",
                               "Affected Cohort*POS in Abi-Cohort",
                               "Post-1961*POS in Abi-Cohort",
                               "Affected Cohort*POS in Abi-Cohort*Post-1961"),
          dep.var.labels.include = FALSE,
          column.labels   = c("Rank","NK","Rank","NK"),
          se=list(se1,se2,se3,se4),
          add.lines = list(c("Treated","M 18-25","M 18-25","M 18-25","M 18-25"),
                           c("Control","M 26-33","M 26-33","M 26-33","M 26-33"),
                           c("Year FE","Yes","Yes","Yes","Yes"),
                           c("Sector FE","Yes","Yes","Yes","Yes"),
                           c("Individual-Level FE","Yes","Yes","Yes","Yes")),
          digits=2,label="tab:diff:g14",
          out="diff_in_diff_abi_table_g14.tex"
)

models <- bind_rows(broom::tidy(m1) %>% mutate(model_name = "m1"),
                    
                    broom::tidy(m2) %>% mutate(model_name = "m2"),
                    
                    broom::tidy(m3) %>% mutate(model_name = "m3"),
                    
                    broom::tidy(m4) %>% mutate(model_name = "m4"))

save(models, file="table_g14.Rdata")





#####################################################################
#####################################################################
## Appendix Table G.15
#####################################################################
#####################################################################

# diff-in-diff, only adjacent male cohorts, with individual level FE
fmla1 <- as.formula(paste("salheiser_positions_correct~cohort_binary + post1962 + cohort_binary*post1962 | year+sector_cf+pkz|0|age_1962"))
fmla2 <- as.formula(paste("nc_dummy~cohort_binary + post1962 + cohort_binary*post1962| year+sector_cf+pkz|0|age_1962"))

#####################################
# only men that served, male 18-25 vs. male 26--33
#####################################
# subset data to men
data_diff <- data[data$female==0,]
data_diff <- data_diff[data_diff$nva_dummy_ever==1,]

# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>17,]
data_diff <- data_diff[data_diff$age_1962<34,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

m1 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m2 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se1 <- m1$cse
se2 <- m2$cse

#####################################
# only men that served, male <26 vs. male older than 25
#####################################
# subset data to men
data_diff <- data[data$female==0,]
data_diff <- data_diff[data_diff$nva_dummy_ever==1,]

# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

m3 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m4 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se3 <- m3$cse
se4 <- m4$cse


#####################################
# Make Table
#####################################
# Table in .tex:
stargazer(m1,m2,m3,m4,type="latex",notes = "Standard errors are clustered at the age-cohort level.",
          style="qje", 
          title            = "Analysis of System Engagement",
          covariate.labels = c("Affected Cohort*Post-1961"),
          dep.var.caption  = "",
          dep.var.labels.include = FALSE,
          column.labels   = c("Rank","NK","Rank","NK"),
          se=list(se1,se2,se3,se4),
          add.lines = list(c("Treated","M 18-25","M 18-25","M <26","M <26"),
                           c("Control","M 26-33","M 26-33","M >25","M >25"),
                           c("Year FE","Yes","Yes","Yes","Yes"),
                           c("Sector FE","Yes","Yes","Yes","Yes"),
                           c("Individual-Level FE","Yes","Yes","Yes","Yes")),
          digits=2,label="tab:diff:g15",keep=c(3),
          out="diff_in_diff_g15.tex"
)

models <- bind_rows(broom::tidy(m1) %>% mutate(model_name = "m1"),
                    
                    broom::tidy(m2) %>% mutate(model_name = "m2"),
                    
                    broom::tidy(m3) %>% mutate(model_name = "m3"),
                    
                    broom::tidy(m4) %>% mutate(model_name = "m4"))

save(models, file="table_g15.Rdata")




